drop _all
set more off
set mem 3g
capture log close
set logtype text
log using world-descriptive, replace

* This do file:
* a) calculates statistics for written text of the "data" and "descriptive statistics" sections
* b) Produces the following graphs:
* 1) Trends (pooled across countries) in:
* i) imr, ii) mother height, iii) log lgdp
* 2) Country-specific trends in i) ii) and iii) above - cutting the data so that we can see trends more clearly
* 3) child-mortality, fertility (not for paper but my own interest), education stats, for all women, and seperately for tall/short women, and by country.


use world_child3

** keep only children with full exposure

tab infant_exp
keep if infant_exp==1

** rescale height

gen height100=height/100

* descriptives 

bys caseid2: gen nchildren=_N
tab nchildren

egen survey_no=group(id2)
egen mother_no=group(caseid2)
egen child_no=group(caseid3)

sum survey_no mother_no child_no

mean infant educfyrs height100 [pw=sweight] /* weighted pooled imr */

sum infant educfyrs height100 /* use to get s.d. of vars - note we can't use wts with s.d. */

* country and time specific IMR, height100 and educfyrs
preserve

collapse (mean) infant height100 educfyrs [pweight=sweight], by(yearc)
sort infant
list

sort yearc
list infant height100 educfyrs

restore

preserve
keep if bord==1
collapse (mean) height100 educfyrs [pweight=sweight], by(yearm)

sort educfyrs
list
sort height100
list

restore

preserve

collapse (sd) infant , by(yearc)
sort infant
list

restore

preserve
keep if bord==1

collapse (sd) educfyrs height100 , by(yearm)
sort height100
list
sort educfyrs
list

restore

preserve

collapse (mean) infant [pweight=sweight], by(country)
sort infant
list

restore

preserve
keep if bord==1

collapse (mean) height100 educfyrs [pweight=sweight], by(country)
sort height100
list
sort educfyrs
list

restore

* growth rates of gdp. dispersion of height100, infant and gdp

preserve

collapse (mean) infant [pweight=sweight], by(country yearc)

* leave out Honduras due to outlier

drop if country=="Honduras"
sum infant if yearc==1970 /* to calculate s.d. of these vars across countries in 1970 */
sum infant if yearc==2000 /* to calculate s.d. of these vars across countries in 2000 */

restore

preserve
keep if bord==1
collapse (mean) height100 educfyrs [pweight=sweight], by(country yearm)

* leave out Honduras due to outlier

drop if country=="Honduras"
sum height100 educfyrs if yearm==1950 /* to calculate s.d. of these vars across countries in 1970 */
sum height100 educfyrs if yearm==1980 /* to calculate s.d. of these vars across countries in 2000 */

restore

** (1)
** i) and ii) 

preserve
sort yearc

collapse infant educfyrs height100 [pweight=sweight], by(yearc)

graph twoway (connect infant yearc), title("IMR over Time") subtitle("38 Countries, 1970-2000") ytitle("IMR") xtitle("Year") xlabel(1970(4)2000) legend(off)
graph export infant-yearc.png, replace

graph twoway (connect educfyrs yearc), title("Average Education of Mothers over Time") subtitle("38 Countries, 1970-2000") ytitle("Average Education, years") xtitle("Year") xlabel(1970(4)2000) legend(off)
graph export educfyrs-yearc.png, replace

graph twoway (connect height100 yearc), title("Height of Mothers over Time") subtitle("38 Countries, 1970-2000") ytitle("Height, in metres") xtitle("Year") xlabel(1970(4)2000) legend(off)
graph export height100-yearc.png, replace

restore
preserve

** iii)
use penn.dta, clear
drop if country=="Nigeria" 

drop countryid
sort country
egen countryid=group(country)
tab1 country countryid

keep if yearc>1969 & yearc<2001

collapse lgdp, by(yearc)
graph twoway connect lgdp yearc, title("Log GDP over Time") subtitle("38 Countries, 1970-2000") ytitle("Log GDP") xtitle("Year") xlabel(1970(4)2000) legend(off)
graph export lgdp-yearc.png, replace

restore
preserve


** 2)
collapse (mean) infant educfyrs height100 [pweight=sweight], by(countryid yearc)

gen trend=yearc-1969
foreach num of numlist 1/38 {
	reg infant trend
	}

** i) - split so trends are clearer - for now, splitting along same lines as (ii) for ease of comparison
** country codes: 1 - BE, 2 - BR, 3 - BF, 4 - CR, 5 - CD, 6 - CM, 7 - CH, 8 - CB, 9 - CO, 10 - CI, 11 - DR, 12 - EG, 13 - ET, 14 - GB, 15 - GH, 16 - GU, 17 - HA, 18 - HO, 19 - IN, 20 - KE, 21 - LE, 22 - MD, 23 - MW, 24 - ML, 25 - MO, 26 - MZ, 27 - NB, 28 - NC, 29 - NG, 30 - PE, 31 - RW,  32 - SE, 33 - TZ, 34 - TO, 35 - TK, 36 - UG, 37 - ZB, 38 - ZW

* drop outlier for Honduras in 1970

drop if yearc==1970 & countryid==18

graph twoway (connect infant  yearc if countryid==1) (connect infant  yearc if countryid==2) ///
(connect infant  yearc if countryid==3) (connect infant  yearc if countryid==4) ///
(connect infant  yearc if countryid==5) (connect infant  yearc if countryid==6) ///
(connect infant  yearc if countryid==7) (connect infant  yearc if countryid==8) ///
(connect infant  yearc if countryid==9) (connect infant  yearc if countryid==10) ///
(connect infant  yearc if countryid==11) (connect infant  yearc if countryid==12) ///
(connect infant  yearc if countryid==13) (connect infant  yearc if countryid==14) ///
(connect infant  yearc if countryid==15) (connect infant  yearc if countryid==16) ///
(connect infant  yearc if countryid==17) (connect infant  yearc if countryid==18) ///
(connect infant  yearc if countryid==19) (connect infant  yearc if countryid==20) ///
(connect infant  yearc if countryid==21) (connect infant  yearc if countryid==22) ///
(connect infant  yearc if countryid==23) (connect infant  yearc if countryid==24) ///
(connect infant  yearc if countryid==25) (connect infant  yearc if countryid==26) ///
(connect infant  yearc if countryid==27) (connect infant  yearc if countryid==28) ///
(connect infant  yearc if countryid==29) (connect infant  yearc if countryid==30) ///
(connect infant  yearc if countryid==31) (connect infant  yearc if countryid==32) ///
(connect infant  yearc if countryid==33) (connect infant  yearc if countryid==34) ///
(connect infant  yearc if countryid==35) (connect infant  yearc if countryid==36) ///
(connect infant  yearc if countryid==37) (connect infant  yearc if countryid==38), ///
title("IMR over Time") subtitle("Country-Specific Trends, 1970-2000") xtitle("Year") ///
ytitle("IMR") xlabel(1970(4)2000) legend(off)
graph export infant-yearc-allcountries.png, replace

graph twoway (connect height100  yearc if countryid==1) (connect height100  yearc if countryid==2) ///
(connect height100  yearc if countryid==3) (connect height100  yearc if countryid==4) ///
(connect height100  yearc if countryid==5) (connect height100  yearc if countryid==6) ///
(connect height100  yearc if countryid==7) (connect height100  yearc if countryid==8) ///
(connect height100  yearc if countryid==9) (connect height100  yearc if countryid==10) ///
(connect height100  yearc if countryid==11) (connect height100  yearc if countryid==12) ///
(connect height100  yearc if countryid==13) (connect height100  yearc if countryid==14) ///
(connect height100  yearc if countryid==15) (connect height100  yearc if countryid==16) ///
(connect height100  yearc if countryid==17) (connect height100  yearc if countryid==18) ///
(connect height100  yearc if countryid==19) (connect height100  yearc if countryid==20) ///
(connect height100  yearc if countryid==21) (connect height100  yearc if countryid==22) ///
(connect height100  yearc if countryid==23) (connect height100  yearc if countryid==24) ///
(connect height100  yearc if countryid==25) (connect height100  yearc if countryid==26) ///
(connect height100  yearc if countryid==27) (connect height100  yearc if countryid==28) ///
(connect height100  yearc if countryid==29) (connect height100  yearc if countryid==30) ///
(connect height100  yearc if countryid==31) (connect height100  yearc if countryid==32) ///
(connect height100  yearc if countryid==33) (connect height100  yearc if countryid==34) ///
(connect height100  yearc if countryid==35) (connect height100  yearc if countryid==36) ///
(connect height100  yearc if countryid==37) (connect height100  yearc if countryid==38), ///
title("Height of Mothers over Time") subtitle("Country-Specific Trends, 1970-2000") xtitle("Year") ///
ytitle("Height, in m") xlabel(1970(4)2000) legend(off)
graph export height100-yearc-allcountries.png, replace

graph twoway (connect educfyrs  yearc if countryid==1) (connect educfyrs  yearc if countryid==2) ///
(connect educfyrs  yearc if countryid==3) (connect educfyrs  yearc if countryid==4) ///
(connect educfyrs  yearc if countryid==5) (connect educfyrs  yearc if countryid==6) ///
(connect educfyrs  yearc if countryid==7) (connect educfyrs  yearc if countryid==8) ///
(connect educfyrs  yearc if countryid==9) (connect educfyrs  yearc if countryid==10) ///
(connect educfyrs  yearc if countryid==11) (connect educfyrs  yearc if countryid==12) ///
(connect educfyrs  yearc if countryid==13) (connect educfyrs  yearc if countryid==14) ///
(connect educfyrs  yearc if countryid==15) (connect educfyrs  yearc if countryid==16) ///
(connect educfyrs  yearc if countryid==17) (connect educfyrs  yearc if countryid==18) ///
(connect educfyrs  yearc if countryid==19) (connect educfyrs  yearc if countryid==20) ///
(connect educfyrs  yearc if countryid==21) (connect educfyrs  yearc if countryid==22) ///
(connect educfyrs  yearc if countryid==23) (connect educfyrs  yearc if countryid==24) ///
(connect educfyrs  yearc if countryid==25) (connect educfyrs  yearc if countryid==26) ///
(connect educfyrs  yearc if countryid==27) (connect educfyrs  yearc if countryid==28) ///
(connect educfyrs  yearc if countryid==29) (connect educfyrs  yearc if countryid==30) ///
(connect educfyrs  yearc if countryid==31) (connect educfyrs  yearc if countryid==32) ///
(connect educfyrs  yearc if countryid==33) (connect educfyrs  yearc if countryid==34) ///
(connect educfyrs  yearc if countryid==35) (connect educfyrs  yearc if countryid==36) ///
(connect educfyrs  yearc if countryid==37) (connect educfyrs  yearc if countryid==38), ///
title("Average Education of Mothers over Time") subtitle("Country-Specific Trends, 1970-2000") xtitle("Year") ///
ytitle("Education, in years") xlabel(1970(4)2000) legend(off)
graph export educfyrs-yearc-allcountries.png, replace

restore

use penn.dta, clear

** descriptive stats
* average gdp growth for each country over period
* average growth across countries each year
* average gdp for each country over the whole period - so we can calculate ratios

preserve
collapse (mean) lgdp, by(yearc)

gen trend=yearc-1979

reg lgdp trend

restore

sort countryid yearc
collapse (mean) lgdp, by(countryid yearc)

gen trend=yearc-1979
foreach num of numlist 1/38 {
	reg lgdp trend if countryid==`num' /* note I am not going to outreg this */
	}

** graphs
graph twoway (connect lgdp yearc if countryid==3) (connect lgdp yearc if countryid==4) (connect lgdp yearc if countryid==5) (connect lgdp yearc if countryid==7) (connect lgdp yearc if countryid==13) (connect lgdp yearc if countryid==22) (connect lgdp yearc if countryid==23) (connect lgdp yearc if countryid==24) (connect lgdp yearc if countryid==26) (connect lgdp yearc if countryid==29) (connect lgdp yearc if countryid==33) (connect lgdp yearc if countryid==34) (connect lgdp yearc if countryid==36), title("Log GDP over Time") subtitle("1970-1997") xtitle("Year") ytitle("lgdp") xlabel(1970(4)1998) legend(order(1 "BF" 2 "CR" 3 "CD" 4 "CH" 5 "ET" 6 "MD" 7 "MW" 8 "ML" 9 "MZ" 10 "NG" 11 "TZ" 12 "TO" 13 "UG")) legend(row(2))
graph export lgdp-yearc-country1.png, replace

graph twoway (connect lgdp yearc if countryid==1) (connect lgdp yearc if countryid==9) (connect lgdp yearc if countryid==10) (connect lgdp yearc if countryid==15) (connect lgdp yearc if countryid==16) (connect lgdp yearc if countryid==17) (connect lgdp yearc if countryid==18) (connect lgdp yearc if countryid==19) (connect lgdp yearc if countryid==20) (connect lgdp yearc if countryid==21) (connect lgdp yearc if countryid==31) (connect lgdp yearc if countryid==32) (connect lgdp yearc if countryid==37), title("Log GDP over Time") subtitle("1970-1997") xtitle("Year") ytitle("lgdp") xlabel(1970(4)1998) legend(order(1 "BE" 2 "CO" 3 "CI" 4 "GH" 5 "GU" 6 "HA" 7 "HO" 8 "IN" 9 "KE" 10 "LE" 11 "RW" 12 "SE" 13 "ZB")) legend(row(2))
graph export lgdp-yearc-country2.png, replace

graph twoway (connect lgdp yearc if countryid==2) (connect lgdp yearc if countryid==6) (connect lgdp yearc if countryid==8)(connect lgdp yearc if countryid==11) (connect lgdp yearc if countryid==12) (connect lgdp yearc if countryid==14)(connect lgdp yearc if countryid==25) (connect lgdp yearc if countryid==27) (connect lgdp yearc if countryid==28) (connect lgdp yearc if countryid==30) (connect lgdp yearc if countryid==35)(connect lgdp yearc if countryid==38), title("Log GDP over Time") subtitle("1970-1997") xtitle("Year") ytitle("lgdp") xlabel(1970(4)1998) legend(order(1 "BR" 2 "CM" 3 "CB" 4 "DR" 5 "EG"  6 "GB" 7 "MO" 8 "NB" 9 "NC" 10 "PE" 11 "TK" 12 "ZW")) legend(row(2))
graph export lgdp-yearc-country3.png, replace

clear
use world_child3

* 3) child-mortality, fertility (not for paper but my own interest), education stats, for all women, and seperately for tall/short women, and by country
* generate fertility variable

bys caseid2: gen no=_n
sum no
bys caseid2: egen fert=max(no)
sum fert

preserve
collapse (mean) infant fert  [pw=sweight], by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight], by(country yearc)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight], by(yearc)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if tall1==1

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if tall2==1

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if tallhalf==1

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if short1==1

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if short2==1

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if shorthalf==1
list

restore

preserve
collapse (mean) infant fert  [pw=sweight] if tall1==1, by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if tall2==1, by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if tallhalf==1, by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if short1==1, by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if short2==1, by(country)

list
restore

preserve
collapse (mean) infant fert  [pw=sweight] if shorthalf==1, by(country)
list
restore

** educf

keep if bord==1

preserve
collapse (mean)  educfyrs [pw=sweight], by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight], by(country yearc)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight], by(yearc)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tall1==1

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tall2==1

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tallhalf==1

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if short1==1

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if short2==1

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if shorthalf==1
list

restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tall1==1, by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tall2==1, by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if tallhalf==1, by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if short1==1, by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if short2==1, by(country)

list
restore

preserve
collapse (mean)  educfyrs [pw=sweight] if shorthalf==1, by(country)
list
restore

log close
exit